#!/usr/bin/perl
#Name    	: ConvertAPDPoutput.pl
#Author  	: Morgan, Matthew
#Created 	: 01/2012
#Purpose 	: Convert APDP validated sequences into mothur and QIIME formats.  Returns fasta, names and groups files compatible with mothur, and a fasta file compatible with QIIME.
#Syntax  	: ./ConvertAPDPoutput.pl <APDP output txt file> <number of samples in table>
#Copyright (c) 2012 Commonwealth Scientific and Industrial Research Organisation (CSIRO) ABN 41 687 119 230.

#########################################################################################################################################################	
#																			#
#CSIRO Open Source Software License Agreement (GPLv3)													#
#																			#
#Copyright (c) 2010, 2012 Commonwealth Scientific and Industrial Research Organisation (CSIRO) ABN 41 687 119 230.					#
#																			#
#All rights reserved. CSIRO is willing to grant you a license to APDP on the terms of the GNU General Public License version 3				#
# as published by the Free Software Foundation (http://www.gnu.org/licenses/gpl.html), except where otherwise indicated for third party material.	#
#The following additional terms apply under clause 7 of that license:											#
#																			#
#EXCEPT AS EXPRESSLY STATED IN THIS LICENCE AND TO THE FULL EXTENT PERMITTED BY APPLICABLE LAW, THE SOFTWARE IS PROVIDED "AS-IS". CSIRO AND ITS		#
#CONTRIBUTORS MAKE NO REPRESENTATIONS, WARRANTIES OR CONDITIONS OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY REPRESENTATIONS,	#
#WARRANTIES OR CONDITIONS REGARDING THE CONTENTS OR ACCURACY OF THE SOFTWARE, OR OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE,		#
#NON-INFRINGEMENT, THE ABSENCE OF LATENT OR OTHER DEFECTS, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT DISCOVERABLE.				#
#																			#
#TO THE FULL EXTENT PERMITTED BY APPLICABLE LAW, IN NO EVENT SHALL CSIRO OR ITS CONTRIBUTORS BE LIABLE ON ANY LEGAL THEORY (INCLUDING, WITHOUT		#
#LIMITATION, IN AN ACTION FOR BREACH OF CONTRACT, NEGLIGENCE OR OTHERWISE) FOR ANY CLAIM, LOSS, DAMAGES OR OTHER LIABILITY HOWSOEVER INCURRED.		#
#WITHOUT LIMITING THE SCOPE OF THE PREVIOUS SENTENCE THE EXCLUSION OF LIABILITY SHALL INCLUDE: LOSS OF PRODUCTION OR OPERATION TIME, LOSS,		#
#DAMAGE OR CORRUPTION OF DATA OR RECORDS; OR LOSS OF ANTICIPATED SAVINGS, OPPORTUNITY, REVENUE, PROFIT OR GOODWILL, OR OTHER ECONOMIC LOSS;		#
#OR ANY SPECIAL, INCIDENTAL, INDIRECT, CONSEQUENTIAL, PUNITIVE OR EXEMPLARY DAMAGES, ARISING OUT OF OR IN CONNECTION WITH THIS LICENCE, THE USE		#
#OF THE SOFTWARE OR THE USE OF OR OTHER DEALINGS WITH THE SOFTWARE, EVEN IF CSIRO OR ITS CONTRIBUTORS HAVE BEEN ADVISED OF THE POSSIBILITY OF		#
#SUCH CLAIM, LOSS, DAMAGES OR OTHER LIABILITY.														#
#																			#
#APPLICABLE LEGISLATION SUCH AS THE AUSTRALIAN CONSUMER LAW MAY IMPLY REPRESENTATIONS, WARRANTIES, OR CONDITIONS, OR IMPOSES OBLIGATIONS		#
#OR LIABILITY ON CSIRO OR ONE OF ITS CONTRIBUTORS IN RESPECT OF THE SOFTWARE THAT CANNOT BE WHOLLY OR PARTLY EXCLUDED, RESTRICTED OR			#
#MODIFIED "CONSUMER GUARANTEES".  IF SUCH CONSUMER GUARANTEES APPLY THEN THE LIABILITY OF CSIRO AND ITS CONTRIBUTORS IS LIMITED, TO THE FULL		#
#EXTENT PERMITTED BY THE APPLICABLE LEGISLATION.  WHERE THE APPLICABLE LEGISLATION PERMITS THE FOLLOWING REMEDIES TO BE PROVIDED FOR BREACH OF		#
#THE CONSUMER GUARANTEES THEN, AT ITS OPTION, CSIRO'S LIABILITY IS LIMITED TO ANY ONE OR MORE OF THEM:							#
#1.          THE REPLACEMENT OF THE SOFTWARE, THE SUPPLY OF EQUIVALENT SOFTWARE, OR SUPPLYING RELEVANT SERVICES AGAIN;					#
#2.          THE REPAIR OF THE SOFTWARE; 														#
#3.          THE PAYMENT OF THE COST OF REPLACING THE SOFTWARE, OF ACQUIRING EQUIVALENT SOFTWARE, HAVING THE RELEVANT SERVICES SUPPLIED AGAIN,		#
#	     OR HAVING THE SOFTWARE REPAIRED.														#
#																			#
#########################################################################################################################################################

use warnings;
use strict;

my %hash;
my $invert;
my @groupnames;
my $treads = 0;
my $nmids = $ARGV[1];
open (TAB,        "<$ARGV[0]");
open (FASTA,      ">APDP_validated_sequences_mothur.fasta");
open (NAMES,      ">APDP_validated_sequences_mothur.names");
open (GROUPS,     ">APDP_validated_sequences_mothur.groups");
open (QIIME,      ">APDP_validated_sequences_qiime.fasta");
open (QIIMEOTUS,  ">APDP_validated_sequences_qiime_otutable.txt");
open (MOTHUROTUS, ">APDP_validated_sequences_mothur_otutable.txt");

print QIIMEOTUS "\#Qiime Line\n";

while (<TAB>) {
    chomp;
    my @array   = split (/\t/,$_);
    $hash{$.}   = \@array;
    if (/^name/) {    	
    	@groupnames	= @array[6..$#array];
    }
    else {
	$invert     = scalar(@array);

	my $seqname = $array[0];
	my $seq     = $array[4];
	my $refname = $seqname . "_1";	
	
	#output to fasta file	
	print FASTA ">$refname\n$seq\n";	
	
	#output to groups file and qiime-formatted fasta file
	my @allnames= ();	
	my $count   = 0;	
	my @mids    = @array[6..$#array];
	my @keys    = (1..$nmids);
	print "@keys\n";
	
	foreach (@keys) {
		my $elem       = $_ - 1;		
		my $samplename = $groupnames[$elem];
		print "$elem\t$samplename\t";		
		my $midreads   = $mids[$elem];
		print "$midreads\n";			
		if ($midreads > 0) {		
			for (my $i=1; $i<=$midreads; $i++) {
				$count++;
				my $newname = $seqname . "_" . $count;
				push @allnames, $newname;
				print GROUPS "$newname\t$samplename\n";
			#qiime
				$treads++;
				my $qname = $samplename . "_" . $treads;
				print QIIME ">$qname\n$seq\n";			
			}
		}
		else {
			next;
		}	
	}	

	#output to names file
	my $nameslist = join (",", @allnames);
	print NAMES "$refname\t$nameslist\n";	
   
    }

    #make qiime otu table
    chomp;
    my @array   = split (/\t/,$_);
    my $name    = shift( @array );
    if ( $. == 1 ) {
	$name = "\#OTU ID";
    }
    my @samples = @array[5..($nmids+4)];
    my $line    = join ( "\t", @samples );
    print QIIMEOTUS "$name\t$line\n";

}

#make mothur otu table

my $notus = ();
for ( my $n=0;$n<$nmids+6;$n++ ) {
    if ( ( $n != 0 ) && ( $n <= 5 ) ) {
	next;
    }
    else {		
	my @new = ();	
	for my $a ( sort {$a<=>$b} keys %hash ) {
		my @curr = @{$hash{$a}};
		push @new, $curr[$n];
	}
	if ( $n == 0 ) { #will have sequence names - change for otu numbers
		my @modnums;
		my $numOTUs = ( scalar( @new ) - 1 );	
		my $count = 1;
		$notus = $numOTUs;	
		foreach( @new ) {
		    unless (/name/) {			
			my $newnum = sprintf ( "%04s", $count );			
			$_ = "Otu" . $newnum;
			push @modnums, $_;
			$count++;
		    }	
		}
		my $line = join ("\t", @modnums);
		print MOTHUROTUS "label\tGroup\tnumOTUs\t$line\n";
	}
	else {
		my $group  = shift( @new ); 		
		my $line   = join ("\t", @new);
		print MOTHUROTUS "unique\t$group\t$notus\t$line\n";
	}
     }
}

close (TAB);
close (FASTA);
close (GROUPS);
close (NAMES);
close (QIIME);
close (QIIMEOTUS);
close (MOTHUROTUS);
